Regression Analysis on Climate Data

1 Importing Necessary Liberaries

import pandas as pd
import plotly.express as px

2 Load the dataset

# Load the dataset
df = pd.read_csv('./processed_data/NABR_historic.csv')
df.drop(columns=['TimePeriod', 'RCP'], axis=1, inplace=True)
df.head()
long lat year scenario treecanopy Ann_Herb Bare Herb Litter Shrub ... PPT_Annual T_Winter T_Summer T_Annual Tmax_Summer Tmin_Winter VWC_Winter_whole VWC_Spring_whole VWC_Summer_whole VWC_Fall_whole
0 -110.0472 37.60413 1980 sc1 0 0 84 5 11 7 ... 13.79 0.964835 23.15924 23.159240 37.05 NaN NaN NaN NaN NaN
1 -110.0472 37.60413 1980 sc1 0 0 84 5 11 7 ... 2.69 0.964835 23.15924 0.964835 37.05 NaN NaN NaN NaN NaN
2 -110.0472 37.60413 1980 sc1 0 0 84 5 11 7 ... 13.79 0.964835 23.15924 0.964835 37.05 NaN NaN NaN NaN NaN
3 -110.0472 37.60413 1980 sc1 0 0 84 5 11 7 ... 2.69 0.964835 23.15924 23.159240 37.05 NaN NaN NaN NaN NaN
4 -110.0472 37.60413 1980 sc1 0 0 84 5 11 7 ... NaN NaN NaN NaN NaN -12.45 0.113447 0.096831 0.041876 0.052298

5 rows × 27 columns

2.1 Missing Values and Data Types

import seaborn as sns
import matplotlib.pyplot as plt

# Visualize missing values
plt.figure(figsize=(15, 10))
sns.heatmap(df.drop(columns=['lat', 'long', 'year', 'scenario','treecanopy', 'Ann_Herb', 'Bare', 'Litter', 'Shrub'], axis=1).isnull(), cbar=False, cmap='viridis')
plt.title('Missing Data Heatmap', fontsize=16, fontweight='bold', loc='left')
plt.xticks(rotation=45) 
plt.show()

3 Data Preprocessing

# Assuming 'lat' and 'long' are the column names for latitude and longitude in your dataset
# Creating an interactive map to visualize the park location
fig = px.scatter_geo(df,
                     lat='lat',
                     lon='long',
                     scope='usa',
                     title='Utah National Parks Geographic Overview')
fig.update_geos(projection_type="natural earth")
fig.update_layout(showlegend=True)
fig.show()

4 EDA

import pandas as pd
import plotly.graph_objects as go
import plotly.offline as pyo

pyo.init_notebook_mode(connected=True)

# Assuming df is your DataFrame
# Engineering features
df['Annual_Soil_Moisture'] = df[['VWC_Winter_whole', 'VWC_Spring_whole', 'VWC_Summer_whole', 'VWC_Fall_whole']].mean(axis=1)
df['Annual_Temperature'] = df['T_Annual']
df['Annual_Precipitation'] = df['PPT_Annual']

vegetation_variables = ['treecanopy', 'Ann_Herb', 'Bare', 'Herb', 'Litter', 'Shrub']

# Set up the matplotlib figure
for veg_var in vegetation_variables:
    # Precipitation vs Vegetation
    plt.figure(figsize=(8, 5.5))
    sns.scatterplot(data=df, x='Annual_Precipitation', y=veg_var, color='#E8B989')
    plt.title(f'Annual Precipitation vs {veg_var}')
    plt.xlabel('Annual Precipitation (mm)')
    plt.ylabel(f'{veg_var} Coverage')
    plt.show()

    # Temperature vs Vegetation
    plt.figure(figsize=(8, 5.5))
    sns.scatterplot(data=df, x='Annual_Temperature', y=veg_var, color='#E8B989')
    plt.title(f'Annual Temperature vs {veg_var}')
    plt.xlabel('Annual Temperature (°C)')
    plt.ylabel(f'{veg_var} Coverage')
    plt.show()

    # Soil Moisture vs Vegetation
    plt.figure(figsize=(8, 5.5))
    sns.scatterplot(data=df, x='Annual_Soil_Moisture', y=veg_var, color='#E8B989')
    plt.title(f'Annual Soil Moisture vs {veg_var}')
    plt.xlabel('Annual Soil Moisture')
    plt.ylabel(f'{veg_var} Coverage')
    plt.show()